# -*- coding: utf-8 -*-
import json

import scrapy

from apps.models import OTA
from apps.models.comment_scrapy import CommentLvmama

'''
驴妈妈现在还只爬取了美团评论，
todo 其它评论后续添加
'''


class LvmamaSpider(scrapy.Spider):
    name = 'lvmama'
    allowed_domains = ['m.lvmama.com']
    start_urls = ['https://m.lvmama.com/']
    base_url = [
        'https://m.lvmama.com/other/router/rest.do?method=api.com.csa.cmt.getCmtCommentList&version=3.0.0&productId=183707&recommend=0&newComment=0&picture=0&best=0&good=0&bad=0&relative=0&currentPage={page_num}&pageSize={page_size}&isELong=N&categoryName=PLACE&mainPlaceId=100025&firstChannel=TOUCH&secondChannel=LVMM',
        # 石燕湖
        'https://m.lvmama.com/other/router/rest.do?method=api.com.csa.cmt.getCmtCommentList&version=3.0.0&productId=256492&platForm=&recommend=0&newComment=0&picture=0&best=0&good=0&bad=0&relative=0&currentPage={page_num}&pageSize={page_size}&isELong=N&categoryName=PLACE&mainPlaceId=103113&firstChannel=TOUCH&secondChannel=LVMM'
        # 石牛寨
    ]
    page_size = 8

    # ota_spot_ids = OTA.OtaSpotIdMap.get_ota_spot_list(OTA.OtaCode.QUNAR)  # ota 景区id列表

    def parse(self, response):
        ota_spots = OTA.OtaSpotIdMap.get_ota_spot(OTA.OtaCode.QUNAR)
        for ota_spot in ota_spots:
            print(ota_spot.ext)
        page_num = response.meta.setdefault('page_num', 0)

        for i in range(0, 2):
            print(i, '____________')
            url = self.base_url[i].format(page_num=page_num, page_size=self.page_size)
            yield scrapy.Request(url=url
                                 , callback=self.parse_data
                                 , headers={"signal": " ab4494b2-f532-4f99-b57e-7ca121a137ca"}
                                 , dont_filter=True
                                 , meta={'page_num': page_num, 'url': url})

    def parse_data(self, response):
        print('--------从 ', response.url, ' 中爬取数据---------------')
        result = json.loads(response.body)
        data = result['data']
        if data['hasNext']:
            page_num = response.meta['page_num'] + 1
            print('------------page_num', page_num)
            for key, value in enumerate(data.get('list')):
                ota_spot_id = value['productId']
                comment_id = value['commentId']

                created_time = value['createdTime']
                avg_score = value['avgScore']
                cmt_latitudes = value['cmtLatitudes']
                cmt_picture_list = value['cmtPictureList']
                content = value['content']
                user_id = value['userId']
                user_img = value['userImg']
                user_name = value['userName']

                vst_name = value.setdefault('vstName', '')
                vst_picture = value.setdefault('vst_picture', '')
                vst_sp_name = value.setdefault('vstSpName', '')

                CommentLvmama.objects(ota_spot_id=ota_spot_id, comment_id=comment_id).update_one(
                    set__created_time=created_time,
                    set__avg_score=avg_score,
                    set__cmt_latitudes=cmt_latitudes,
                    set__cmt_picture_list=cmt_picture_list,
                    set__content=content,
                    set__user_id=user_id,
                    set__user_img=user_img,
                    set__user_name=user_name,
                    set__vst_name=vst_name,
                    set__vst_picture=vst_picture,
                    set__vst_sp_name=vst_sp_name,
                    upsert=True
                )

            yield scrapy.Request(url=response.meta['url']
                                 , callback=self.parse
                                 , headers={"signal": " ab4494b2-f532-4f99-b57e-7ca121a137ca"}
                                 , dont_filter=True
                                 , meta={'page_num': page_num})
